Basic libraries for dealing with data:
library(plotly) # interactive graphs
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(RColorBrewer) # make new color sets
library(tidyverse) # easy manipulation of dataframes
## ── Attaching packages ──────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 1.4.2 ✔ purrr 0.2.5
## ✔ tidyr 0.8.2 ✔ dplyr 0.7.8
## ✔ readr 1.3.1 ✔ stringr 1.3.1
## ✔ tibble 1.4.2 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(DT) # generate interactive html tables
library(rvest) # scrap web pages
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
source("utils.R") # custom files with utilitary functions
Some libraries for better handling Rmarkdown:
library(DT) # iteractive datatables
Lê dados de contas do governo
despesas.funcao.2018 <- read_csv2(
join.path(data.dir, "tesouro-csv", "2018_Despesas-Funcao-ESTADOS.csv"),
locale=locale(encoding="ISO-8859-15"),
skip=3,
col_types = cols(.default = col_factor(NULL),
"População" = col_number(),
Valor = col_number()))
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
datatable(despesas.funcao.2018, filter="top")
Lê dados do IDEB 4o-5o anos para cada estado, já foi convertido do HTML para CSV:
# leitura do CSV extraído do HTML
estados45ano.df <- read_csv2(join.path(data.dir, "ideb/ideb-csv/ideb-estados-45ano.csv"))
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
## Parsed with column specification:
## cols(
## Estado = col_character(),
## Ano = col_double(),
## Ideb = col_double(),
## Meta = col_double()
## )
estados89ano.df <- read_csv2(join.path(data.dir, "ideb/ideb-csv/ideb-estados-89ano.csv"))
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
## Parsed with column specification:
## cols(
## Estado = col_character(),
## Ano = col_double(),
## Ideb = col_double(),
## Meta = col_double()
## )
datatable(estados45ano.df, options=list(scrollX=T), rownames=F, filter="top")
datatable(despesas.funcao.2018,
rownames=F,
filter="top",
options=list(pageLength = 5, scrollX=T))
despesas.funcao.2018 %>%
select(Conta) %>%
unique() %>%
datatable(rownames=F, filter="top")
despesas.funcao.2018 %>%
select(UF, População) %>%
ggplot(aes(x=UF, y=População)) +
geom_bar(stat = "identity")
É aplicado um filtro para calcular a soma total de investimentos em educação de todo tipo “código inicia com 12”
despesas.funcao.2018 %>%
filter(startsWith(as.character(Conta), "12")) %>% # todas que começam com "12", "12.1234" etc.
group_by(UF) %>%
summarise(InvEduc = sum(Valor)) %>%
ggplot(aes(x=UF, y=InvEduc)) +
geom_bar(stat = "identity")
despesas.funcao.2018 %>%
filter(startsWith(as.character(Conta), "12")) %>%
group_by(UF) %>%
summarise(InvEduc = sum(Valor)) %>%
ggplot(aes(x=UF, y=InvEduc)) +
geom_bar(stat = "identity")